# library(shiny)
library(dplyr)
library(ggplot2)
library(tidyr)
library(reshape2) # reshaping the data
library(corrplot) # correlation matrix
library(car) # applied regression
data_1 <- read.csv(file = "./salary_data_cleaned.csv") #
data_2 <- read.csv(file = "./glassdoor_jobs.csv") #
data_3 <- read.csv(file = "./eda_data.csv") # data science job postings from Glassdoor.com for 2017-2018 33 variables
data_4 <- read.csv(file = "./salaries_2.csv") # data scientist salaries for 2024
head(data_1, 10)
head(data_2, 5)
head(data_3, 5)
head(data_4, 5)
# checking for missing values in each dataframe
sum(is.na(data_1))
[1] 0
sum(is.na(data_2))
[1] 0
sum(is.na(data_3))
[1] 0
sum(is.na(data_4))
[1] 0
# check if there are empty strings and replace them with NA
# dplyr::mutate_all(data_1, list(~na_if(.,"")))
data_1 %>% dplyr::mutate_if(is.character, list(~na_if(.,"")))
data_2 %>% dplyr::mutate_if(is.character, list(~na_if(.,"")))
data_3 %>% dplyr::mutate_if(is.character, list(~na_if(.,"")))
data_4 %>% dplyr::mutate_if(is.character, list(~na_if(.,"")))
sum(is.na(data_1))
[1] 0
sum(is.na(data_2))
[1] 0
sum(is.na(data_3))
[1] 0
sum(is.na(data_4))
[1] 0
summary.data.frame(data_1)
Job.Title Salary.Estimate Job.Description
Length:742 Length:742 Length:742
Class :character Class :character Class :character
Mode :character Mode :character Mode :character
Rating Company.Name Location
Min. :-1.000 Length:742 Length:742
1st Qu.: 3.300 Class :character Class :character
Median : 3.700 Mode :character Mode :character
Mean : 3.619
3rd Qu.: 4.000
Max. : 5.000
Headquarters Size Founded
Length:742 Length:742 Min. : -1
Class :character Class :character 1st Qu.:1939
Mode :character Mode :character Median :1988
Mean :1837
3rd Qu.:2007
Max. :2019
Type.of.ownership Industry Sector
Length:742 Length:742 Length:742
Class :character Class :character Class :character
Mode :character Mode :character Mode :character
Revenue Competitors hourly
Length:742 Length:742 Min. :0.00000
Class :character Class :character 1st Qu.:0.00000
Mode :character Mode :character Median :0.00000
Mean :0.03234
3rd Qu.:0.00000
Max. :1.00000
employer_provided min_salary max_salary
Min. :0.00000 Min. : 10.00 Min. : 16.0
1st Qu.:0.00000 1st Qu.: 52.00 1st Qu.: 96.0
Median :0.00000 Median : 69.50 Median :124.0
Mean :0.02291 Mean : 74.07 Mean :127.2
3rd Qu.:0.00000 3rd Qu.: 91.00 3rd Qu.:155.0
Max. :1.00000 Max. :202.00 Max. :306.0
avg_salary company_txt job_state
Min. : 13.5 Length:742 Length:742
1st Qu.: 73.5 Class :character Class :character
Median : 97.5 Mode :character Mode :character
Mean :100.6
3rd Qu.:122.5
Max. :254.0
same_state age python_yn
Min. :0.000 Min. : -1.00 Min. :0.0000
1st Qu.:0.000 1st Qu.: 11.00 1st Qu.:0.0000
Median :1.000 Median : 24.00 Median :1.0000
Mean :0.558 Mean : 46.59 Mean :0.5283
3rd Qu.:1.000 3rd Qu.: 59.00 3rd Qu.:1.0000
Max. :1.000 Max. :276.00 Max. :1.0000
R_yn spark aws
Min. :0.000000 Min. :0.0000 Min. :0.0000
1st Qu.:0.000000 1st Qu.:0.0000 1st Qu.:0.0000
Median :0.000000 Median :0.0000 Median :0.0000
Mean :0.002695 Mean :0.2251 Mean :0.2372
3rd Qu.:0.000000 3rd Qu.:0.0000 3rd Qu.:0.0000
Max. :1.000000 Max. :1.0000 Max. :1.0000
excel
Min. :0.0000
1st Qu.:0.0000
Median :1.0000
Mean :0.5229
3rd Qu.:1.0000
Max. :1.0000
summary.data.frame(data_2)
X Job.Title Salary.Estimate
Min. : 0.0 Length:956 Length:956
1st Qu.:238.8 Class :character Class :character
Median :477.5 Mode :character Mode :character
Mean :477.5
3rd Qu.:716.2
Max. :955.0
Job.Description Rating Company.Name
Length:956 Min. :-1.000 Length:956
Class :character 1st Qu.: 3.300 Class :character
Mode :character Median : 3.800 Mode :character
Mean : 3.601
3rd Qu.: 4.200
Max. : 5.000
Location Headquarters Size
Length:956 Length:956 Length:956
Class :character Class :character Class :character
Mode :character Mode :character Mode :character
Founded Type.of.ownership Industry
Min. : -1 Length:956 Length:956
1st Qu.:1937 Class :character Class :character
Median :1992 Mode :character Mode :character
Mean :1775
3rd Qu.:2008
Max. :2019
Sector Revenue Competitors
Length:956 Length:956 Length:956
Class :character Class :character Class :character
Mode :character Mode :character Mode :character
summary.data.frame(data_3)
X Job.Title Salary.Estimate
Min. : 0.0 Length:742 Length:742
1st Qu.:185.2 Class :character Class :character
Median :370.5 Mode :character Mode :character
Mean :370.5
3rd Qu.:555.8
Max. :741.0
Job.Description Rating Company.Name
Length:742 Min. :-1.000 Length:742
Class :character 1st Qu.: 3.300 Class :character
Mode :character Median : 3.700 Mode :character
Mean : 3.619
3rd Qu.: 4.000
Max. : 5.000
Location Headquarters Size
Length:742 Length:742 Length:742
Class :character Class :character Class :character
Mode :character Mode :character Mode :character
Founded Type.of.ownership Industry
Min. : -1 Length:742 Length:742
1st Qu.:1939 Class :character Class :character
Median :1988 Mode :character Mode :character
Mean :1837
3rd Qu.:2007
Max. :2019
Sector Revenue Competitors
Length:742 Length:742 Length:742
Class :character Class :character Class :character
Mode :character Mode :character Mode :character
hourly employer_provided min_salary
Min. :0.00000 Min. :0.00000 Min. : 15.00
1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.: 52.00
Median :0.00000 Median :0.00000 Median : 69.50
Mean :0.03234 Mean :0.02291 Mean : 74.72
3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.: 91.00
Max. :1.00000 Max. :1.00000 Max. :202.00
max_salary avg_salary company_txt
Min. : 16.0 Min. : 13.5 Length:742
1st Qu.: 96.0 1st Qu.: 73.5 Class :character
Median :124.0 Median : 97.5 Mode :character
Mean :128.1 Mean :100.6
3rd Qu.:155.0 3rd Qu.:122.5
Max. :306.0 Max. :254.0
job_state same_state age
Length:742 Min. :0.000 Min. : -1.00
Class :character 1st Qu.:0.000 1st Qu.: 11.00
Mode :character Median :1.000 Median : 24.00
Mean :0.558 Mean : 46.59
3rd Qu.:1.000 3rd Qu.: 59.00
Max. :1.000 Max. :276.00
python_yn R_yn spark
Min. :0.0000 Min. :0.000000 Min. :0.0000
1st Qu.:0.0000 1st Qu.:0.000000 1st Qu.:0.0000
Median :1.0000 Median :0.000000 Median :0.0000
Mean :0.5283 Mean :0.002695 Mean :0.2251
3rd Qu.:1.0000 3rd Qu.:0.000000 3rd Qu.:0.0000
Max. :1.0000 Max. :1.000000 Max. :1.0000
aws excel job_simp
Min. :0.0000 Min. :0.0000 Length:742
1st Qu.:0.0000 1st Qu.:0.0000 Class :character
Median :0.0000 Median :1.0000 Mode :character
Mean :0.2372 Mean :0.5229
3rd Qu.:0.0000 3rd Qu.:1.0000
Max. :1.0000 Max. :1.0000
seniority desc_len num_comp
Length:742 Min. : 407 Min. :0.000
Class :character 1st Qu.: 2801 1st Qu.:0.000
Mode :character Median : 3731 Median :0.000
Mean : 3870 Mean :1.054
3rd Qu.: 4740 3rd Qu.:3.000
Max. :10051 Max. :4.000
summary.data.frame(data_4)
work_year experience_level employment_type
Min. :2020 Length:16494 Length:16494
1st Qu.:2023 Class :character Class :character
Median :2023 Mode :character Mode :character
Mean :2023
3rd Qu.:2024
Max. :2024
job_title salary salary_currency
Length:16494 Min. : 14000 Length:16494
Class :character 1st Qu.: 102000 Class :character
Mode :character Median : 142200 Mode :character
Mean : 163788
3rd Qu.: 187342
Max. :30400000
salary_in_usd employee_residence remote_ratio
Min. : 15000 Length:16494 Min. : 0.00
1st Qu.:101518 Class :character 1st Qu.: 0.00
Median :141300 Mode :character Median : 0.00
Mean :149714 Mean : 32.04
3rd Qu.:185900 3rd Qu.:100.00
Max. :800000 Max. :100.00
company_location company_size
Length:16494 Length:16494
Class :character Class :character
Mode :character Mode :character
# dropping column X from data_3
data_3 <- data_3 %>% select(-X)
summary.data.frame(data_3)
Job.Title Salary.Estimate Job.Description
Length:742 Length:742 Length:742
Class :character Class :character Class :character
Mode :character Mode :character Mode :character
Rating Company.Name Location
Min. :-1.000 Length:742 Length:742
1st Qu.: 3.300 Class :character Class :character
Median : 3.700 Mode :character Mode :character
Mean : 3.619
3rd Qu.: 4.000
Max. : 5.000
Headquarters Size Founded
Length:742 Length:742 Min. : -1
Class :character Class :character 1st Qu.:1939
Mode :character Mode :character Median :1988
Mean :1837
3rd Qu.:2007
Max. :2019
Type.of.ownership Industry Sector
Length:742 Length:742 Length:742
Class :character Class :character Class :character
Mode :character Mode :character Mode :character
Revenue Competitors hourly
Length:742 Length:742 Min. :0.00000
Class :character Class :character 1st Qu.:0.00000
Mode :character Mode :character Median :0.00000
Mean :0.03234
3rd Qu.:0.00000
Max. :1.00000
employer_provided min_salary max_salary
Min. :0.00000 Min. : 15.00 Min. : 16.0
1st Qu.:0.00000 1st Qu.: 52.00 1st Qu.: 96.0
Median :0.00000 Median : 69.50 Median :124.0
Mean :0.02291 Mean : 74.72 Mean :128.1
3rd Qu.:0.00000 3rd Qu.: 91.00 3rd Qu.:155.0
Max. :1.00000 Max. :202.00 Max. :306.0
avg_salary company_txt job_state
Min. : 13.5 Length:742 Length:742
1st Qu.: 73.5 Class :character Class :character
Median : 97.5 Mode :character Mode :character
Mean :100.6
3rd Qu.:122.5
Max. :254.0
same_state age python_yn
Min. :0.000 Min. : -1.00 Min. :0.0000
1st Qu.:0.000 1st Qu.: 11.00 1st Qu.:0.0000
Median :1.000 Median : 24.00 Median :1.0000
Mean :0.558 Mean : 46.59 Mean :0.5283
3rd Qu.:1.000 3rd Qu.: 59.00 3rd Qu.:1.0000
Max. :1.000 Max. :276.00 Max. :1.0000
R_yn spark aws
Min. :0.000000 Min. :0.0000 Min. :0.0000
1st Qu.:0.000000 1st Qu.:0.0000 1st Qu.:0.0000
Median :0.000000 Median :0.0000 Median :0.0000
Mean :0.002695 Mean :0.2251 Mean :0.2372
3rd Qu.:0.000000 3rd Qu.:0.0000 3rd Qu.:0.0000
Max. :1.000000 Max. :1.0000 Max. :1.0000
excel job_simp seniority
Min. :0.0000 Length:742 Length:742
1st Qu.:0.0000 Class :character Class :character
Median :1.0000 Mode :character Mode :character
Mean :0.5229
3rd Qu.:1.0000
Max. :1.0000
desc_len num_comp
Min. : 407 Min. :0.000
1st Qu.: 2801 1st Qu.:0.000
Median : 3731 Median :0.000
Mean : 3870 Mean :1.054
3rd Qu.: 4740 3rd Qu.:3.000
Max. :10051 Max. :4.000
# how many different companies are in data_1
length(unique(data_1$company_txt))
[1] 343
# how many different industies in data_1
length(unique(data_1$Industry))
[1] 60
# how many different sectors in data_1
length(unique(data_1$Sector))
[1] 25
# how many different job titels in data_1
length(unique(data_1$Job.Title))
[1] 264
# how many different states in data_1
length(unique(data_1$job_state))
[1] 38
# heat map to see the correlation between variables
# create a subset for data_1 with numeric variables only
data_numeric <- select_if(data_1, is.numeric)
data_numeric <- data_numeric[, !names(data_numeric) %in% c("Founded","hourly", "employer_provided", "same_state", "age")]
# data_melted <- melt(data_numeric)
# correlation matrix
cor_matrix <- cor(data_numeric)
cor_melted <- melt(cor_matrix, varnames = c("Var1", "Var2"), value.name = "Correlation")
ggplot(data = cor_melted, aes(x = Var1, y = Var2, fill = Correlation)) +
geom_tile() +
labs(title = "Correlation Heatmap",
x = "Variable",
y = "Variable") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1)) +
scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, limit = c(-1, 1), space = "Lab",
name = "Correlation") +
geom_text(aes(label = round(Correlation, 2)), color = "black", size = 2)

python_jobs <- data_1[data_1$python_yn ==1, ]
rate_python = ((nrow(python_jobs))/nrow(data_1))*100
r_jobs <- data_1[data_1$R_yn ==1, ]
rate_r = ((nrow(r_jobs))/nrow(data_1))*100
spark_jobs <- data_1[data_1$spark ==1, ]
rate_spark = ((nrow(spark_jobs))/nrow(data_1))*100
aws_jobs <- data_1[data_1$aws ==1, ]
rate_aws = ((nrow(aws_jobs))/nrow(data_1))*100
excel_jobs <- data_1[data_1$excel == 1, ]
rate_excel = ((nrow(excel_jobs))/nrow(data_1))*100
skills_data <- data.frame(
skill = c("Python", "R", "Spark", "AWS", "Excel"),
rate = c(rate_python, rate_r, rate_spark, rate_aws, rate_excel)
)
ggplot(skills_data, aes(x = skill, y = rate, fill = skill)) +
geom_bar(stat = "identity")+
theme_light()+
labs(title = "Proportion of Skills Required", x = "Skill", y = "Rate (%)") +
geom_text(aes(label = paste0(round(rate, 1), "%")),
position = position_stack(vjust = 0.5),size=3)

# multiple skills vs salary
# 2 skills vs 3 skills vs 4 skills
# how many skills in each row are true, create new column
filter_skills <- function(data, skill_count) {
data %>%
rowwise() %>%
mutate(skill_count = sum(c_across(c(python_yn, R_yn, spark, aws, excel)) == 1)) %>%
ungroup()
}
data_skills <- filter_skills(data_1) %>%
filter(skill_count >= 0 & skill_count <= 5)
ggplot(data_skills, aes(x = factor(skill_count), y = avg_salary)) +
geom_boxplot() +
theme_minimal() +
labs(title = paste(i, "Number of skills vs Avg Salary"),
x = "Number of Skills",
y = "Average Salary(thousands)")+
theme(plot.title = element_text(hjust = 0.5))

NA
NA
data_processed <- data_3 %>%
select(Rating, avg_salary, job_simp, job_state, python_yn, R_yn, spark, aws, excel, Industry, Revenue)
# convert categorical variables to factors
data_processed$job_simp <- as.factor(data_processed$job_simp)
data_processed$job_state <- as.factor(data_processed$job_state)
data_processed$Industry <- as.factor(data_processed$Industry)
data_processed$Revenue <- as.factor(data_processed$Revenue)
data_processed$python_yn <- as.factor(data_processed$python_yn)
data_processed$R_yn <- as.factor(data_processed$R_yn)
data_processed$spark <- as.factor(data_processed$spark)
data_processed$aws <- as.factor(data_processed$aws)
data_processed$excel <- as.factor(data_processed$excel)
model <- lm(avg_salary ~ Rating + job_simp + job_state + python_yn + R_yn + spark + aws + excel + Industry + Revenue, data = data_processed)
summary(model)
Call:
lm(formula = avg_salary ~ Rating + job_simp + job_state + python_yn +
R_yn + spark + aws + excel + Industry + Revenue, data = data_processed)
Residuals:
Min 1Q Median 3Q Max
-107.62 -14.95 0.00 13.17 133.26
Coefficients: (1 not defined because of singularities)
Estimate
(Intercept) 98.0694
Rating 2.5376
job_simpdata engineer 33.8054
job_simpdata scientist 40.9660
job_simpdirector 88.5658
job_simpmanager 7.0144
job_simpmle 55.9473
job_simpna 16.8178
job_stateAZ -2.0590
job_stateCA 30.1917
job_stateCO -12.9093
job_stateCT -34.6073
job_stateDC 16.4993
job_stateDE -27.2004
job_stateFL -12.6928
job_stateGA -16.7533
job_stateIA 8.1254
job_stateID -25.2666
job_stateIL 13.0808
job_stateIN -2.1185
job_stateKS -75.8209
job_stateKY 11.7685
job_stateLA -28.1624
job_stateMA 4.3689
job_stateMD 6.8498
job_stateMI 11.6912
job_stateMN 8.1189
job_stateMO 12.3346
job_stateNC 6.0081
job_stateNE -23.1371
job_stateNJ 7.5742
job_stateNM -42.9483
job_stateNY 8.3923
job_stateOH 3.3154
job_stateOR 0.9058
job_statePA 8.3758
job_stateRI 39.5152
job_stateSC 37.8292
job_stateTN -3.7972
job_stateTX -6.4142
job_stateUT 6.3697
job_stateVA 2.4295
job_stateWA 12.7499
job_stateWI 1.5595
python_yn1 6.2350
R_yn1 8.0145
spark1 -3.8294
aws1 5.4445
excel1 0.7815
IndustryAccounting -27.3787
IndustryAdvertising & Marketing -41.2832
IndustryAerospace & Defense -43.9131
IndustryArchitectural & Engineering Services -81.8475
IndustryAuctions & Galleries -27.1965
IndustryBanks & Credit Unions -60.5123
IndustryBeauty & Personal Accessories Stores -39.1759
IndustryBiotech & Pharmaceuticals -28.3646
IndustryBrokerage Services 2.7746
IndustryColleges & Universities -39.0168
IndustryComputer Hardware & Software -34.8746
IndustryConstruction -91.9622
IndustryConsulting -37.4573
IndustryConsumer Product Rental -63.1633
IndustryConsumer Products Manufacturing -38.2034
IndustryDepartment, Clothing, & Shoe Stores -31.0168
IndustryEducation Training Services -73.6498
IndustryEnergy -70.7289
IndustryEnterprise Software & Network Solutions -28.6853
IndustryFarm Support Services -58.5843
IndustryFederal Agencies -55.4564
IndustryFinancial Analytics & Research -22.3137
IndustryFinancial Transaction Processing -54.3022
IndustryFood & Beverage Manufacturing -63.9363
IndustryGambling -75.1210
IndustryGas Stations -46.8705
IndustryHealth Care Products Manufacturing -39.1578
IndustryHealth Care Services & Hospitals -54.7651
IndustryHealth, Beauty, & Fitness -72.5956
IndustryIndustrial Manufacturing -73.8454
IndustryInsurance Agencies & Brokerages -43.0114
IndustryInsurance Carriers -41.7075
IndustryInternet -28.0805
IndustryInvestment Banking & Asset Management -15.8407
IndustryIT Services -36.7093
IndustryK-12 Education -58.2762
IndustryLending -44.5079
IndustryLogistics & Supply Chain -60.2426
IndustryMetals Brokers -48.2462
IndustryMining -61.7283
IndustryMotion Picture Production & Distribution 6.5835
IndustryOther Retail Stores -19.0592
IndustryReal Estate -46.9169
IndustryReligious Organizations -46.5904
IndustryResearch & Development -58.3445
IndustrySecurity Services -26.1471
IndustrySocial Assistance -59.5021
IndustrySporting Goods Stores -77.7117
IndustryStaffing & Outsourcing -64.1122
IndustryStock Exchanges NA
IndustryTelecommunications Manufacturing -40.4989
IndustryTelecommunications Services -43.6140
IndustryTransportation Equipment Manufacturing -62.6550
IndustryTransportation Management -36.4182
IndustryTravel Agencies -52.9995
IndustryTrucking -23.3470
IndustryTV Broadcast & Cable Networks -32.7397
IndustryVideo Games -66.2078
IndustryWholesale -59.0957
Revenue$1 to $2 billion (USD) -1.7843
Revenue$1 to $5 million (USD) -8.9855
Revenue$10 to $25 million (USD) -16.0173
Revenue$10+ billion (USD) 2.9453
Revenue$100 to $500 million (USD) -14.5568
Revenue$2 to $5 billion (USD) -9.6651
Revenue$25 to $50 million (USD) -21.5341
Revenue$5 to $10 billion (USD) -0.5771
Revenue$5 to $10 million (USD) 27.7521
Revenue$50 to $100 million (USD) -9.5054
Revenue$500 million to $1 billion (USD) -7.6013
RevenueLess than $1 million (USD) -40.9042
RevenueUnknown / Non-Applicable -9.9493
Std. Error
(Intercept) 29.7424
Rating 2.0691
job_simpdata engineer 4.8421
job_simpdata scientist 4.1544
job_simpdirector 9.0189
job_simpmanager 7.4187
job_simpmle 8.0563
job_simpna 4.5546
job_stateAZ 14.8357
job_stateCA 11.7112
job_stateCO 14.1865
job_stateCT 17.1992
job_stateDC 14.2495
job_stateDE 17.0202
job_stateFL 13.9595
job_stateGA 16.0018
job_stateIA 17.7771
job_stateID 27.6969
job_stateIL 12.7844
job_stateIN 14.6888
job_stateKS 25.5462
job_stateKY 18.2380
job_stateLA 18.1018
job_stateMA 12.0118
job_stateMD 12.0598
job_stateMI 17.9264
job_stateMN 22.7009
job_stateMO 15.3389
job_stateNC 13.6688
job_stateNE 18.9118
job_stateNJ 13.7564
job_stateNM 19.5681
job_stateNY 12.0465
job_stateOH 14.3513
job_stateOR 18.6413
job_statePA 13.3573
job_stateRI 29.8500
job_stateSC 32.1668
job_stateTN 15.0934
job_stateTX 13.0192
job_stateUT 20.5097
job_stateVA 11.9218
job_stateWA 14.5571
job_stateWI 15.8276
python_yn1 2.7870
R_yn1 25.9298
spark1 3.2545
aws1 2.9803
excel1 2.4449
IndustryAccounting 32.9108
IndustryAdvertising & Marketing 14.6378
IndustryAerospace & Defense 15.6051
IndustryArchitectural & Engineering Services 22.1421
IndustryAuctions & Galleries 30.7326
IndustryBanks & Credit Unions 16.1836
IndustryBeauty & Personal Accessories Stores 29.7001
IndustryBiotech & Pharmaceuticals 13.3360
IndustryBrokerage Services 24.1069
IndustryColleges & Universities 15.1475
IndustryComputer Hardware & Software 15.0430
IndustryConstruction 19.7900
IndustryConsulting 15.3841
IndustryConsumer Product Rental 21.2158
IndustryConsumer Products Manufacturing 15.4945
IndustryDepartment, Clothing, & Shoe Stores 17.5149
IndustryEducation Training Services 21.4387
IndustryEnergy 18.2461
IndustryEnterprise Software & Network Solutions 14.6995
IndustryFarm Support Services 32.8583
IndustryFederal Agencies 16.0560
IndustryFinancial Analytics & Research 17.9261
IndustryFinancial Transaction Processing 20.2709
IndustryFood & Beverage Manufacturing 16.8874
IndustryGambling 20.2564
IndustryGas Stations 21.9788
IndustryHealth Care Products Manufacturing 31.3432
IndustryHealth Care Services & Hospitals 14.1671
IndustryHealth, Beauty, & Fitness 32.3617
IndustryIndustrial Manufacturing 19.8789
IndustryInsurance Agencies & Brokerages 17.9046
IndustryInsurance Carriers 13.8519
IndustryInternet 15.5202
IndustryInvestment Banking & Asset Management 19.4566
IndustryIT Services 14.2034
IndustryK-12 Education 20.0433
IndustryLending 17.8319
IndustryLogistics & Supply Chain 21.1218
IndustryMetals Brokers 25.6368
IndustryMining 31.2623
IndustryMotion Picture Production & Distribution 31.0956
IndustryOther Retail Stores 30.9008
IndustryReal Estate 17.8614
IndustryReligious Organizations 23.9757
IndustryResearch & Development 15.2970
IndustrySecurity Services 19.3169
IndustrySocial Assistance 21.7173
IndustrySporting Goods Stores 25.8427
IndustryStaffing & Outsourcing 19.2225
IndustryStock Exchanges NA
IndustryTelecommunications Manufacturing 25.5230
IndustryTelecommunications Services 20.9031
IndustryTransportation Equipment Manufacturing 34.0424
IndustryTransportation Management 22.0203
IndustryTravel Agencies 18.0637
IndustryTrucking 31.8976
IndustryTV Broadcast & Cable Networks 24.2450
IndustryVideo Games 23.1375
IndustryWholesale 21.4668
Revenue$1 to $2 billion (USD) 28.8346
Revenue$1 to $5 million (USD) 31.0939
Revenue$10 to $25 million (USD) 29.0204
Revenue$10+ billion (USD) 28.7066
Revenue$100 to $500 million (USD) 28.7133
Revenue$2 to $5 billion (USD) 29.1895
Revenue$25 to $50 million (USD) 28.9459
Revenue$5 to $10 billion (USD) 29.5280
Revenue$5 to $10 million (USD) 29.6584
Revenue$50 to $100 million (USD) 28.9581
Revenue$500 million to $1 billion (USD) 29.0157
RevenueLess than $1 million (USD) 32.1823
RevenueUnknown / Non-Applicable 28.4257
t value
(Intercept) 3.297
Rating 1.226
job_simpdata engineer 6.982
job_simpdata scientist 9.861
job_simpdirector 9.820
job_simpmanager 0.946
job_simpmle 6.945
job_simpna 3.692
job_stateAZ -0.139
job_stateCA 2.578
job_stateCO -0.910
job_stateCT -2.012
job_stateDC 1.158
job_stateDE -1.598
job_stateFL -0.909
job_stateGA -1.047
job_stateIA 0.457
job_stateID -0.912
job_stateIL 1.023
job_stateIN -0.144
job_stateKS -2.968
job_stateKY 0.645
job_stateLA -1.556
job_stateMA 0.364
job_stateMD 0.568
job_stateMI 0.652
job_stateMN 0.358
job_stateMO 0.804
job_stateNC 0.440
job_stateNE -1.223
job_stateNJ 0.551
job_stateNM -2.195
job_stateNY 0.697
job_stateOH 0.231
job_stateOR 0.049
job_statePA 0.627
job_stateRI 1.324
job_stateSC 1.176
job_stateTN -0.252
job_stateTX -0.493
job_stateUT 0.311
job_stateVA 0.204
job_stateWA 0.876
job_stateWI 0.099
python_yn1 2.237
R_yn1 0.309
spark1 -1.177
aws1 1.827
excel1 0.320
IndustryAccounting -0.832
IndustryAdvertising & Marketing -2.820
IndustryAerospace & Defense -2.814
IndustryArchitectural & Engineering Services -3.696
IndustryAuctions & Galleries -0.885
IndustryBanks & Credit Unions -3.739
IndustryBeauty & Personal Accessories Stores -1.319
IndustryBiotech & Pharmaceuticals -2.127
IndustryBrokerage Services 0.115
IndustryColleges & Universities -2.576
IndustryComputer Hardware & Software -2.318
IndustryConstruction -4.647
IndustryConsulting -2.435
IndustryConsumer Product Rental -2.977
IndustryConsumer Products Manufacturing -2.466
IndustryDepartment, Clothing, & Shoe Stores -1.771
IndustryEducation Training Services -3.435
IndustryEnergy -3.876
IndustryEnterprise Software & Network Solutions -1.951
IndustryFarm Support Services -1.783
IndustryFederal Agencies -3.454
IndustryFinancial Analytics & Research -1.245
IndustryFinancial Transaction Processing -2.679
IndustryFood & Beverage Manufacturing -3.786
IndustryGambling -3.709
IndustryGas Stations -2.133
IndustryHealth Care Products Manufacturing -1.249
IndustryHealth Care Services & Hospitals -3.866
IndustryHealth, Beauty, & Fitness -2.243
IndustryIndustrial Manufacturing -3.715
IndustryInsurance Agencies & Brokerages -2.402
IndustryInsurance Carriers -3.011
IndustryInternet -1.809
IndustryInvestment Banking & Asset Management -0.814
IndustryIT Services -2.585
IndustryK-12 Education -2.908
IndustryLending -2.496
IndustryLogistics & Supply Chain -2.852
IndustryMetals Brokers -1.882
IndustryMining -1.975
IndustryMotion Picture Production & Distribution 0.212
IndustryOther Retail Stores -0.617
IndustryReal Estate -2.627
IndustryReligious Organizations -1.943
IndustryResearch & Development -3.814
IndustrySecurity Services -1.354
IndustrySocial Assistance -2.740
IndustrySporting Goods Stores -3.007
IndustryStaffing & Outsourcing -3.335
IndustryStock Exchanges NA
IndustryTelecommunications Manufacturing -1.587
IndustryTelecommunications Services -2.086
IndustryTransportation Equipment Manufacturing -1.840
IndustryTransportation Management -1.654
IndustryTravel Agencies -2.934
IndustryTrucking -0.732
IndustryTV Broadcast & Cable Networks -1.350
IndustryVideo Games -2.861
IndustryWholesale -2.753
Revenue$1 to $2 billion (USD) -0.062
Revenue$1 to $5 million (USD) -0.289
Revenue$10 to $25 million (USD) -0.552
Revenue$10+ billion (USD) 0.103
Revenue$100 to $500 million (USD) -0.507
Revenue$2 to $5 billion (USD) -0.331
Revenue$25 to $50 million (USD) -0.744
Revenue$5 to $10 billion (USD) -0.020
Revenue$5 to $10 million (USD) 0.936
Revenue$50 to $100 million (USD) -0.328
Revenue$500 million to $1 billion (USD) -0.262
RevenueLess than $1 million (USD) -1.271
RevenueUnknown / Non-Applicable -0.350
Pr(>|t|)
(Intercept) 0.001032
Rating 0.220516
job_simpdata engineer 7.51e-12
job_simpdata scientist < 2e-16
job_simpdirector < 2e-16
job_simpmanager 0.344770
job_simpmle 9.59e-12
job_simpna 0.000242
job_stateAZ 0.889664
job_stateCA 0.010166
job_stateCO 0.363191
job_stateCT 0.044635
job_stateDC 0.247356
job_stateDE 0.110523
job_stateFL 0.363566
job_stateGA 0.295524
job_stateIA 0.647779
job_stateID 0.361990
job_stateIL 0.306617
job_stateIN 0.885371
job_stateKS 0.003113
job_stateKY 0.518987
job_stateLA 0.120269
job_stateMA 0.716192
job_stateMD 0.570246
job_stateMI 0.514528
job_stateMN 0.720728
job_stateMO 0.421624
job_stateNC 0.660418
job_stateNE 0.221636
job_stateNJ 0.582109
job_stateNM 0.028546
job_stateNY 0.486279
job_stateOH 0.817376
job_stateOR 0.961262
job_statePA 0.530851
job_stateRI 0.186058
job_stateSC 0.240032
job_stateTN 0.801447
job_stateTX 0.622418
job_stateUT 0.756233
job_stateVA 0.838590
job_stateWA 0.381448
job_stateWI 0.921542
python_yn1 0.025629
R_yn1 0.757360
spark1 0.239795
aws1 0.068208
excel1 0.749334
IndustryAccounting 0.405781
IndustryAdvertising & Marketing 0.004951
IndustryAerospace & Defense 0.005047
IndustryArchitectural & Engineering Services 0.000238
IndustryAuctions & Galleries 0.376530
IndustryBanks & Credit Unions 0.000202
IndustryBeauty & Personal Accessories Stores 0.187638
IndustryBiotech & Pharmaceuticals 0.033819
IndustryBrokerage Services 0.908406
IndustryColleges & Universities 0.010231
IndustryComputer Hardware & Software 0.020754
IndustryConstruction 4.12e-06
IndustryConsulting 0.015180
IndustryConsumer Product Rental 0.003022
IndustryConsumer Products Manufacturing 0.013947
IndustryDepartment, Clothing, & Shoe Stores 0.077070
IndustryEducation Training Services 0.000631
IndustryEnergy 0.000117
IndustryEnterprise Software & Network Solutions 0.051453
IndustryFarm Support Services 0.075084
IndustryFederal Agencies 0.000590
IndustryFinancial Analytics & Research 0.213688
IndustryFinancial Transaction Processing 0.007583
IndustryFood & Beverage Manufacturing 0.000168
IndustryGambling 0.000227
IndustryGas Stations 0.033354
IndustryHealth Care Products Manufacturing 0.212017
IndustryHealth Care Services & Hospitals 0.000122
IndustryHealth, Beauty, & Fitness 0.025232
IndustryIndustrial Manufacturing 0.000222
IndustryInsurance Agencies & Brokerages 0.016586
IndustryInsurance Carriers 0.002710
IndustryInternet 0.070889
IndustryInvestment Banking & Asset Management 0.415867
IndustryIT Services 0.009978
IndustryK-12 Education 0.003773
IndustryLending 0.012819
IndustryLogistics & Supply Chain 0.004487
IndustryMetals Brokers 0.060314
IndustryMining 0.048764
IndustryMotion Picture Production & Distribution 0.832395
IndustryOther Retail Stores 0.537602
IndustryReal Estate 0.008834
IndustryReligious Organizations 0.052439
IndustryResearch & Development 0.000150
IndustrySecurity Services 0.176360
IndustrySocial Assistance 0.006323
IndustrySporting Goods Stores 0.002744
IndustryStaffing & Outsourcing 0.000903
IndustryStock Exchanges NA
IndustryTelecommunications Manufacturing 0.113075
IndustryTelecommunications Services 0.037342
IndustryTransportation Equipment Manufacturing 0.066172
IndustryTransportation Management 0.098663
IndustryTravel Agencies 0.003469
IndustryTrucking 0.464484
IndustryTV Broadcast & Cable Networks 0.177390
IndustryVideo Games 0.004358
IndustryWholesale 0.006080
Revenue$1 to $2 billion (USD) 0.950678
Revenue$1 to $5 million (USD) 0.772694
Revenue$10 to $25 million (USD) 0.581193
Revenue$10+ billion (USD) 0.918313
Revenue$100 to $500 million (USD) 0.612355
Revenue$2 to $5 billion (USD) 0.740668
Revenue$25 to $50 million (USD) 0.457192
Revenue$5 to $10 billion (USD) 0.984412
Revenue$5 to $10 million (USD) 0.349778
Revenue$50 to $100 million (USD) 0.742836
Revenue$500 million to $1 billion (USD) 0.793431
RevenueLess than $1 million (USD) 0.204199
RevenueUnknown / Non-Applicable 0.726449
(Intercept) **
Rating
job_simpdata engineer ***
job_simpdata scientist ***
job_simpdirector ***
job_simpmanager
job_simpmle ***
job_simpna ***
job_stateAZ
job_stateCA *
job_stateCO
job_stateCT *
job_stateDC
job_stateDE
job_stateFL
job_stateGA
job_stateIA
job_stateID
job_stateIL
job_stateIN
job_stateKS **
job_stateKY
job_stateLA
job_stateMA
job_stateMD
job_stateMI
job_stateMN
job_stateMO
job_stateNC
job_stateNE
job_stateNJ
job_stateNM *
job_stateNY
job_stateOH
job_stateOR
job_statePA
job_stateRI
job_stateSC
job_stateTN
job_stateTX
job_stateUT
job_stateVA
job_stateWA
job_stateWI
python_yn1 *
R_yn1
spark1
aws1 .
excel1
IndustryAccounting
IndustryAdvertising & Marketing **
IndustryAerospace & Defense **
IndustryArchitectural & Engineering Services ***
IndustryAuctions & Galleries
IndustryBanks & Credit Unions ***
IndustryBeauty & Personal Accessories Stores
IndustryBiotech & Pharmaceuticals *
IndustryBrokerage Services
IndustryColleges & Universities *
IndustryComputer Hardware & Software *
IndustryConstruction ***
IndustryConsulting *
IndustryConsumer Product Rental **
IndustryConsumer Products Manufacturing *
IndustryDepartment, Clothing, & Shoe Stores .
IndustryEducation Training Services ***
IndustryEnergy ***
IndustryEnterprise Software & Network Solutions .
IndustryFarm Support Services .
IndustryFederal Agencies ***
IndustryFinancial Analytics & Research
IndustryFinancial Transaction Processing **
IndustryFood & Beverage Manufacturing ***
IndustryGambling ***
IndustryGas Stations *
IndustryHealth Care Products Manufacturing
IndustryHealth Care Services & Hospitals ***
IndustryHealth, Beauty, & Fitness *
IndustryIndustrial Manufacturing ***
IndustryInsurance Agencies & Brokerages *
IndustryInsurance Carriers **
IndustryInternet .
IndustryInvestment Banking & Asset Management
IndustryIT Services **
IndustryK-12 Education **
IndustryLending *
IndustryLogistics & Supply Chain **
IndustryMetals Brokers .
IndustryMining *
IndustryMotion Picture Production & Distribution
IndustryOther Retail Stores
IndustryReal Estate **
IndustryReligious Organizations .
IndustryResearch & Development ***
IndustrySecurity Services
IndustrySocial Assistance **
IndustrySporting Goods Stores **
IndustryStaffing & Outsourcing ***
IndustryStock Exchanges
IndustryTelecommunications Manufacturing
IndustryTelecommunications Services *
IndustryTransportation Equipment Manufacturing .
IndustryTransportation Management .
IndustryTravel Agencies **
IndustryTrucking
IndustryTV Broadcast & Cable Networks
IndustryVideo Games **
IndustryWholesale **
Revenue$1 to $2 billion (USD)
Revenue$1 to $5 million (USD)
Revenue$10 to $25 million (USD)
Revenue$10+ billion (USD)
Revenue$100 to $500 million (USD)
Revenue$2 to $5 billion (USD)
Revenue$25 to $50 million (USD)
Revenue$5 to $10 billion (USD)
Revenue$5 to $10 million (USD)
Revenue$50 to $100 million (USD)
Revenue$500 million to $1 billion (USD)
RevenueLess than $1 million (USD)
RevenueUnknown / Non-Applicable
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 26.86 on 622 degrees of freedom
Multiple R-squared: 0.599, Adjusted R-squared: 0.5223
F-statistic: 7.807 on 119 and 622 DF, p-value: < 2.2e-16
# Residuals vs Fitted
ggplot(model, aes(.fitted, .resid)) +
geom_point() +
geom_smooth(se = FALSE) +
labs(title = "Residuals vs Fitted", x = "Fitted values", y = "Residuals") +
theme_minimal()
`geom_smooth()` using method = 'loess' and formula = 'y ~
x'

# Normal Q-Q
ggplot(model, aes(sample = .stdresid)) +
stat_qq() +
stat_qq_line() +
labs(title = "Normal Q-Q", x = "Theoretical Quantiles", y = "Standardized Residuals") +
theme_minimal()
Warning: Removed 13 rows containing non-finite outside the scale
range (`stat_qq()`).
Warning: Removed 13 rows containing non-finite outside the scale
range (`stat_qq_line()`).

# Scale-Location (or Spread-Location)
ggplot(model, aes(.fitted, sqrt(abs(.stdresid)))) +
geom_point() +
geom_smooth(se = FALSE) +
labs(title = "Scale-Location", x = "Fitted values", y = "Square Root of Standardized Residuals") +
theme_minimal()
`geom_smooth()` using method = 'loess' and formula = 'y ~
x'
Warning: Removed 13 rows containing non-finite outside the scale
range (`stat_smooth()`).
Warning: Removed 13 rows containing missing values or values outside
the scale range (`geom_point()`).

# Residuals vs Leverage
ggplot(model, aes(.hat, .stdresid)) +
geom_point() +
geom_smooth(se = FALSE) +
labs(title = "Residuals vs Leverage", x = "Leverage", y = "Standardized Residuals") +
theme_minimal()
`geom_smooth()` using method = 'loess' and formula = 'y ~
x'
Warning: Removed 13 rows containing non-finite outside the scale
range (`stat_smooth()`).
Warning: Removed 13 rows containing missing values or values outside
the scale range (`geom_point()`).

# rating vs average salary
ggplot(data_processed, aes(x = Rating, y = avg_salary)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(title = "Effect of Rating on Avg Salary", x = "Rating", y = "Average Salary") +
theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

# job title vs salary
ggplot(data_processed, aes(x = job_simp, y = avg_salary)) +
geom_boxplot() +
labs(title = "Effect of Job Title on Salary", x = "Job Title", y = "Average Salary") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# job state vs salary
ggplot(data_processed, aes(x = job_state, y = avg_salary)) +
geom_boxplot() +
labs(title = "Effect of Job State on Avg Salary", x = "Job State", y = "Average Salary") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 60, hjust = 1))

NA
NA
NA
# Create a regression plot with skill count
ggplot(filtered_data, aes(x = skill_count, y = avg_salary)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(title = "Number of Skills vs Avg Salary", x = "Number of Skills", y = "Average Salary") +
theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

table(data_3$seniority)
jr na senior
2 520 220
#group by industry and senioity to find average salary for each senioority level
industry_salary <- data_3 %>%
group_by(Industry, seniority) %>%
summarize(avg_salary = mean(avg_salary, na.rm = TRUE)) %>% ungroup()
`summarise()` has grouped output by 'Industry'. You can
override using the `.groups` argument.
# top 10 highest salaries for senior positions
top_senior <- industry_salary %>%
filter(seniority == "senior") %>%
arrange(desc(avg_salary)) %>% slice_head(n=10)
# top 10 salaries for jr or non specified positions
top_non_senior <- industry_salary %>%
filter(seniority == "jr" | seniority == "na" ) %>%
arrange(desc(avg_salary)) %>% slice_head(n=10)
industry_salary %>% slice(3:18) %>%
ggplot(aes(x = Industry, y = avg_salary, fill = seniority)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Salary by Industry and Seniority", x = "Industry", y = "Average Salary") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# there is one entry in column job_state that has Los Angeles instead of CA,so we need to fix that
data_1 <- data_1 %>%
mutate(job_state = ifelse(job_state == " Los Angeles" | job_state == "CA", " CA", job_state))
# check
unique(data_1$job_state)
[1] " NM" " MD" " FL" " WA" " NY" " TX" " CA" " VA" " MA"
[10] " NJ" " CO" " IL" " KY" " OR" " CT" " MI" " DC" " OH"
[19] " AL" " MO" " PA" " GA" " IN" " LA" " WI" " NC" " AZ"
[28] " NE" " MN" " UT" " TN" " DE" " ID" " RI" " IA" " SC"
[37] " KS"
highest_salary <- data_1 %>%
arrange(desc(avg_salary))
# plot of highest salaries per State
highest_salary %>%
ggplot(aes(x = job_state, y = max_salary)) +
geom_bar(stat = "identity", fill = "orange") +
labs(title = "Highest Salaries by State", x = "State", y = "Salary") +
theme(axis.text.x = element_text(angle = 90, hjust = 0.5))

# plot of highest salaries per City
highest_salary %>%
slice_head(n=30) %>%
ggplot( aes(x = Location, y = max_salary)) +
geom_bar(stat = "identity", fill = "purple") +
labs(title = "Highest Salaries by City", x = "State", y = "Salary") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

table(data_3$Type.of.ownership)
-1
1
College / University
13
Company - Private
410
Company - Public
193
Government
15
Hospital
15
Nonprofit Organization
55
Other Organization
3
School / School District
2
Subsidiary or Business Segment
34
Unknown
1
filtered_data <- data_3 %>%
filter(Type.of.ownership %in% c("Company - Private", "Company - Public","Government", "Nonprofit Organization")) %>%
group_by(Type.of.ownership) %>%
summarize(avg_salary = mean(avg_salary))
filtered_data %>%
ggplot(aes(x = Type.of.ownership, y = avg_salary, fill = Type.of.ownership)) +
geom_bar(stat = "identity") +
labs(title = "Salary by Ownership Type", x = "Type of Ownership", y = "Average Salary") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

data_1 %>%
group_by(job_state) %>%
summarize(avg_salary = mean(avg_salary)) %>%
arrange(desc(avg_salary)) %>%
slice_head(n=20) %>%
ggplot(aes(x = reorder(job_state, avg_salary), y = avg_salary, fill = job_state)) +
geom_bar(stat = "identity") +
labs(title = "Average Salary by State", x = "State", y = "Average Salary") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))

NA
NA
# remove states with 1 entry of with repeating entries
state_data <- data_1 %>%
filter(!(job_state %in% c(" KS", " DE", " SC", " RI")))
# function that creates violin plots for each state
state_salary <- function(state_name) {
state_data <- data_1 %>%
filter(job_state == state_name)
ggplot(state_data, aes(x = job_state, y = avg_salary, fill = job_state)) +
geom_violin() +
labs(title = paste("Salary Range in", state_name), x = "State", y = "Salary") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
}
# separate plots for each state
state_plots <- lapply(unique(state_data$job_state), state_salary)
state_plots
[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
[[6]]
[[7]]
[[8]]
[[9]]
[[10]]
[[11]]
[[12]]
[[13]]
[[14]]
[[15]]
[[16]]
[[17]]
[[18]]
[[19]]
[[20]]
[[21]]
[[22]]
[[23]]
[[24]]
[[25]]
[[26]]
[[27]]
[[28]]
[[29]]
[[30]]
[[31]]
[[32]]
[[33]]

































# check why certain plots didn't work
wrong_entries <- data_1 %>%
filter(job_state == " KS" | job_state ==" DE" |job_state == " SC" |job_state == " RI")
print(wrong_entries)
NA
# check all the unique values for Revenue
# table(data_3$Revenue)
# revenue categories with the most entries
revenue_categories <- c(
"$50 to $100 million (USD)",
"$100 to $500 million (USD)",
"$500 million to $1 billion (USD)",
"$1 to $2 billion (USD)",
"$10+ billion (USD)"
)
# filter data
data_3 %>% filter(Revenue %in% revenue_categories) %>%
ggplot(aes(x = Revenue, y = avg_salary, fill = Revenue)) +
geom_boxplot() +
labs(
title = "Salary Distribution by Company Revenue",
x = "Company Revenue",
y = "Average Salary"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(size = 12, face = "bold", vjust = 2),
plot.margin = margin(5,0,0,10),
legend.text = element_text(size = 9)
)

NA
NA
NA
unique(data_4$work_year)
[1] 2024 2022 2023 2020 2021
table(data_1$Size)
-1 1 to 50 employees
1 31
10000+ employees 1001 to 5000 employees
130 150
201 to 500 employees 5001 to 10000 employees
117 76
501 to 1000 employees 51 to 200 employees
134 94
Unknown
9
table(data_4$work_year)
2020 2021 2022 2023 2024
75 218 1655 8519 6027
table(data_3$Revenue)
-1
1
$1 to $2 billion (USD)
60
$1 to $5 million (USD)
8
$10 to $25 million (USD)
32
$10+ billion (USD)
124
$100 to $500 million (USD)
91
$2 to $5 billion (USD)
39
$25 to $50 million (USD)
40
$5 to $10 billion (USD)
19
$5 to $10 million (USD)
18
$50 to $100 million (USD)
46
$500 million to $1 billion (USD)
57
Less than $1 million (USD)
4
Unknown / Non-Applicable
203
# find all the unique job titles from data_3
unique(data_3$job_simp)
[1] "data scientist" "na"
[3] "analyst" "data engineer"
[5] "director" "manager"
[7] "mle"
# see how many of each unique job title there are
table(data_3$job_simp)
analyst data engineer data scientist
102 119 279
director manager mle
14 22 22
na
184
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQojIGxpYnJhcnkoc2hpbnkpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeSh0aWR5cikKbGlicmFyeShyZXNoYXBlMikgIyByZXNoYXBpbmcgdGhlIGRhdGEgCmxpYnJhcnkoY29ycnBsb3QpICMgY29ycmVsYXRpb24gbWF0cml4IApsaWJyYXJ5KGNhcikgIyBhcHBsaWVkIHJlZ3Jlc3Npb24gCgpkYXRhXzEgPC0gcmVhZC5jc3YoZmlsZSA9ICIuL3NhbGFyeV9kYXRhX2NsZWFuZWQuY3N2IikgIyAKZGF0YV8yIDwtIHJlYWQuY3N2KGZpbGUgPSAiLi9nbGFzc2Rvb3Jfam9icy5jc3YiKSAjIApkYXRhXzMgPC0gcmVhZC5jc3YoZmlsZSA9ICIuL2VkYV9kYXRhLmNzdiIpICMgIGRhdGEgc2NpZW5jZSBqb2IgcG9zdGluZ3MgZnJvbSBHbGFzc2Rvb3IuY29tIGZvciAyMDE3LTIwMTggMzMgdmFyaWFibGVzIApkYXRhXzQgPC0gcmVhZC5jc3YoZmlsZSA9ICIuL3NhbGFyaWVzXzIuY3N2IikgICMgZGF0YSBzY2llbnRpc3Qgc2FsYXJpZXMgZm9yIDIwMjQKCmBgYAoKCmBgYHtyfQpoZWFkKGRhdGFfMSwgMTApCmBgYAoKCmBgYHtyfQpoZWFkKGRhdGFfMiwgNSkKYGBgCgoKYGBge3J9CmhlYWQoZGF0YV8zLCA1KQpgYGAKCgpgYGB7cn0KaGVhZChkYXRhXzQsIDUpCmBgYApgYGB7cn0KIyBjaGVja2luZyBmb3IgbWlzc2luZyB2YWx1ZXMgaW4gZWFjaCBkYXRhZnJhbWUgCnN1bShpcy5uYShkYXRhXzEpKQpzdW0oaXMubmEoZGF0YV8yKSkKc3VtKGlzLm5hKGRhdGFfMykpCnN1bShpcy5uYShkYXRhXzQpKQpgYGAKCgpgYGB7cn0KIyBjaGVjayBpZiB0aGVyZSBhcmUgZW1wdHkgc3RyaW5ncyBhbmQgcmVwbGFjZSB0aGVtIHdpdGggTkEKIyBkcGx5cjo6bXV0YXRlX2FsbChkYXRhXzEsIGxpc3Qofm5hX2lmKC4sIiIpKSkKZGF0YV8xICU+JSBkcGx5cjo6bXV0YXRlX2lmKGlzLmNoYXJhY3RlciwgbGlzdCh+bmFfaWYoLiwiIikpKSAKZGF0YV8yICU+JSBkcGx5cjo6bXV0YXRlX2lmKGlzLmNoYXJhY3RlciwgbGlzdCh+bmFfaWYoLiwiIikpKSAKZGF0YV8zICU+JSBkcGx5cjo6bXV0YXRlX2lmKGlzLmNoYXJhY3RlciwgbGlzdCh+bmFfaWYoLiwiIikpKSAKZGF0YV80ICU+JSBkcGx5cjo6bXV0YXRlX2lmKGlzLmNoYXJhY3RlciwgbGlzdCh+bmFfaWYoLiwiIikpKSAKYGBgCmBgYHtyfQpzdW0oaXMubmEoZGF0YV8xKSkKc3VtKGlzLm5hKGRhdGFfMikpCnN1bShpcy5uYShkYXRhXzMpKQpzdW0oaXMubmEoZGF0YV80KSkKYGBgCgoKYGBge3J9CnN1bW1hcnkuZGF0YS5mcmFtZShkYXRhXzEpCgpgYGAKYGBge3J9CnN1bW1hcnkuZGF0YS5mcmFtZShkYXRhXzIpCmBgYApgYGB7cn0Kc3VtbWFyeS5kYXRhLmZyYW1lKGRhdGFfMykKYGBgCmBgYHtyfQpzdW1tYXJ5LmRhdGEuZnJhbWUoZGF0YV80KQpgYGAKYGBge3J9CiMgZHJvcHBpbmcgY29sdW1uIFggZnJvbSBkYXRhXzMKZGF0YV8zIDwtIGRhdGFfMyAlPiUgc2VsZWN0KC1YKQpzdW1tYXJ5LmRhdGEuZnJhbWUoZGF0YV8zKQpgYGAKYGBge3J9CiMgaG93IG1hbnkgZGlmZmVyZW50IGNvbXBhbmllcyBhcmUgaW4gZGF0YV8xCmxlbmd0aCh1bmlxdWUoZGF0YV8xJGNvbXBhbnlfdHh0KSkKIyBob3cgbWFueSBkaWZmZXJlbnQgaW5kdXN0aWVzIGluIGRhdGFfMQpsZW5ndGgodW5pcXVlKGRhdGFfMSRJbmR1c3RyeSkpCiMgaG93IG1hbnkgZGlmZmVyZW50IHNlY3RvcnMgaW4gZGF0YV8xCmxlbmd0aCh1bmlxdWUoZGF0YV8xJFNlY3RvcikpCiMgaG93IG1hbnkgZGlmZmVyZW50IGpvYiB0aXRlbHMgaW4gZGF0YV8xCmxlbmd0aCh1bmlxdWUoZGF0YV8xJEpvYi5UaXRsZSkpCiMgaG93IG1hbnkgZGlmZmVyZW50IHN0YXRlcyBpbiBkYXRhXzEKbGVuZ3RoKHVuaXF1ZShkYXRhXzEkam9iX3N0YXRlKSkKYGBgCmBgYHtyfQojIGhlYXQgbWFwIHRvIHNlZSB0aGUgY29ycmVsYXRpb24gYmV0d2VlbiB2YXJpYWJsZXMgCiMgY3JlYXRlIGEgc3Vic2V0IGZvciBkYXRhXzEgd2l0aCBudW1lcmljIHZhcmlhYmxlcyBvbmx5CgpkYXRhX251bWVyaWMgPC0gc2VsZWN0X2lmKGRhdGFfMSwgaXMubnVtZXJpYykgCgpkYXRhX251bWVyaWMgPC0gZGF0YV9udW1lcmljWywgIW5hbWVzKGRhdGFfbnVtZXJpYykgJWluJSBjKCJGb3VuZGVkIiwiaG91cmx5IiwgImVtcGxveWVyX3Byb3ZpZGVkIiwgInNhbWVfc3RhdGUiLCAiYWdlIildCgojIGRhdGFfbWVsdGVkIDwtIG1lbHQoZGF0YV9udW1lcmljKQoKIyBjb3JyZWxhdGlvbiBtYXRyaXggCmNvcl9tYXRyaXggPC0gY29yKGRhdGFfbnVtZXJpYykKY29yX21lbHRlZCA8LSBtZWx0KGNvcl9tYXRyaXgsIHZhcm5hbWVzID0gYygiVmFyMSIsICJWYXIyIiksIHZhbHVlLm5hbWUgPSAiQ29ycmVsYXRpb24iKQoKZ2dwbG90KGRhdGEgPSBjb3JfbWVsdGVkLCBhZXMoeCA9IFZhcjEsIHkgPSBWYXIyLCBmaWxsID0gQ29ycmVsYXRpb24pKSArCiAgZ2VvbV90aWxlKCkgKwogIGxhYnModGl0bGUgPSAiQ29ycmVsYXRpb24gSGVhdG1hcCIsCiAgICAgICB4ID0gIlZhcmlhYmxlIiwKICAgICAgIHkgPSAiVmFyaWFibGUiKSArCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA0NSwgdmp1c3QgPSAxLCBzaXplID0gMTIsIGhqdXN0ID0gMSkpICsKICBzY2FsZV9maWxsX2dyYWRpZW50Mihsb3cgPSAiYmx1ZSIsIGhpZ2ggPSAicmVkIiwgbWlkID0gIndoaXRlIiwgbWlkcG9pbnQgPSAwLCBsaW1pdCA9IGMoLTEsIDEpLCBzcGFjZSA9ICJMYWIiLCAKICAgICAgICAgICAgICAgICAgICAgICBuYW1lID0gIkNvcnJlbGF0aW9uIikgKwogICBnZW9tX3RleHQoYWVzKGxhYmVsID0gcm91bmQoQ29ycmVsYXRpb24sIDIpKSwgY29sb3IgPSAiYmxhY2siLCBzaXplID0gMikKYGBgCmBgYHtyfQpweXRob25fam9icyA8LSBkYXRhXzFbZGF0YV8xJHB5dGhvbl95biA9PTEsIF0KcmF0ZV9weXRob24gPSAoKG5yb3cocHl0aG9uX2pvYnMpKS9ucm93KGRhdGFfMSkpKjEwMAoKcl9qb2JzIDwtIGRhdGFfMVtkYXRhXzEkUl95biA9PTEsIF0KcmF0ZV9yID0gKChucm93KHJfam9icykpL25yb3coZGF0YV8xKSkqMTAwCgpzcGFya19qb2JzIDwtIGRhdGFfMVtkYXRhXzEkc3BhcmsgPT0xLCBdCnJhdGVfc3BhcmsgPSAoKG5yb3coc3Bhcmtfam9icykpL25yb3coZGF0YV8xKSkqMTAwCgphd3Nfam9icyA8LSBkYXRhXzFbZGF0YV8xJGF3cyA9PTEsIF0KcmF0ZV9hd3MgPSAoKG5yb3coYXdzX2pvYnMpKS9ucm93KGRhdGFfMSkpKjEwMAoKZXhjZWxfam9icyA8LSBkYXRhXzFbZGF0YV8xJGV4Y2VsID09IDEsIF0KcmF0ZV9leGNlbCA9ICgobnJvdyhleGNlbF9qb2JzKSkvbnJvdyhkYXRhXzEpKSoxMDAKCnNraWxsc19kYXRhIDwtIGRhdGEuZnJhbWUoCiAgc2tpbGwgPSBjKCJQeXRob24iLCAiUiIsICJTcGFyayIsICJBV1MiLCAiRXhjZWwiKSwKICByYXRlID0gYyhyYXRlX3B5dGhvbiwgcmF0ZV9yLCByYXRlX3NwYXJrLCByYXRlX2F3cywgcmF0ZV9leGNlbCkKKQpnZ3Bsb3Qoc2tpbGxzX2RhdGEsIGFlcyh4ID0gc2tpbGwsIHkgPSByYXRlLCBmaWxsID0gc2tpbGwpKSArCiAgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIpKwogIHRoZW1lX2xpZ2h0KCkrCiAgbGFicyh0aXRsZSA9ICJTa2lsbHMgUmVxdWlyZWQiLCB4ID0gIlNraWxsIiwgeSA9ICJSYXRlICglKSIpICsKICBnZW9tX3RleHQoYWVzKGxhYmVsID0gcGFzdGUwKHJvdW5kKHJhdGUsIDEpLCAiJSIpKSwgCiAgICAgICAgICAgICBwb3NpdGlvbiA9IHBvc2l0aW9uX3N0YWNrKHZqdXN0ID0gMC41KSxzaXplPTMpCgpgYGAKYGBge3J9CiMgbXVsdGlwbGUgc2tpbGxzIHZzIHNhbGFyeSAKIyAyIHNraWxscyB2cyAzIHNraWxscyB2cyA0IHNraWxscwojIGhvdyBtYW55IHNraWxscyBpbiBlYWNoIHJvdyBhcmUgdHJ1ZSwgY3JlYXRlIG5ldyBjb2x1bW4gCmZpbHRlcl9za2lsbHMgPC0gZnVuY3Rpb24oZGF0YSwgc2tpbGxfY291bnQpIHsKICBkYXRhICU+JQogICAgcm93d2lzZSgpICU+JQogICAgbXV0YXRlKHNraWxsX2NvdW50ID0gc3VtKGNfYWNyb3NzKGMocHl0aG9uX3luLCBSX3luLCBzcGFyaywgYXdzLCBleGNlbCkpID09IDEpKSAlPiUKICAgIHVuZ3JvdXAoKSAKICAgIAp9CgoKICBkYXRhX3NraWxscyA8LSBmaWx0ZXJfc2tpbGxzKGRhdGFfMSkgJT4lCiAgICBmaWx0ZXIoc2tpbGxfY291bnQgPj0gMCAmIHNraWxsX2NvdW50IDw9IDUpCiAgZ2dwbG90KGRhdGFfc2tpbGxzLCBhZXMoeCA9IGZhY3Rvcihza2lsbF9jb3VudCksIHkgPSBhdmdfc2FsYXJ5KSkgKwogICAgZ2VvbV9ib3hwbG90KCkgKwogICAgdGhlbWVfbWluaW1hbCgpICsKICAgIGxhYnModGl0bGUgPSBwYXN0ZShpLCAiTnVtYmVyIG9mIHNraWxscyB2cyBTYWxhcnkiKSwgCiAgICAgICAgIHggPSAiTnVtYmVyIG9mIFNraWxscyIsIAogICAgICAgICB5ID0gIkF2ZXJhZ2UgU2FsYXJ5KHRob3VzYW5kcykiKSsKICB0aGVtZShwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KGhqdXN0ID0gMC41KSkKICAKCmBgYApgYGB7cn0KCmRhdGFfcHJvY2Vzc2VkIDwtIGRhdGFfMyAlPiUKICBzZWxlY3QoUmF0aW5nLCBhdmdfc2FsYXJ5LCBqb2Jfc2ltcCwgam9iX3N0YXRlLCBweXRob25feW4sIFJfeW4sIHNwYXJrLCBhd3MsIGV4Y2VsLCBJbmR1c3RyeSwgUmV2ZW51ZSkKCiMgY29udmVydCBjYXRlZ29yaWNhbCB2YXJpYWJsZXMgdG8gZmFjdG9ycwpkYXRhX3Byb2Nlc3NlZCRqb2Jfc2ltcCA8LSBhcy5mYWN0b3IoZGF0YV9wcm9jZXNzZWQkam9iX3NpbXApCmRhdGFfcHJvY2Vzc2VkJGpvYl9zdGF0ZSA8LSBhcy5mYWN0b3IoZGF0YV9wcm9jZXNzZWQkam9iX3N0YXRlKQpkYXRhX3Byb2Nlc3NlZCRJbmR1c3RyeSA8LSBhcy5mYWN0b3IoZGF0YV9wcm9jZXNzZWQkSW5kdXN0cnkpCmRhdGFfcHJvY2Vzc2VkJFJldmVudWUgPC0gYXMuZmFjdG9yKGRhdGFfcHJvY2Vzc2VkJFJldmVudWUpCmRhdGFfcHJvY2Vzc2VkJHB5dGhvbl95biA8LSBhcy5mYWN0b3IoZGF0YV9wcm9jZXNzZWQkcHl0aG9uX3luKQpkYXRhX3Byb2Nlc3NlZCRSX3luIDwtIGFzLmZhY3RvcihkYXRhX3Byb2Nlc3NlZCRSX3luKQpkYXRhX3Byb2Nlc3NlZCRzcGFyayA8LSBhcy5mYWN0b3IoZGF0YV9wcm9jZXNzZWQkc3BhcmspCmRhdGFfcHJvY2Vzc2VkJGF3cyA8LSBhcy5mYWN0b3IoZGF0YV9wcm9jZXNzZWQkYXdzKQpkYXRhX3Byb2Nlc3NlZCRleGNlbCA8LSBhcy5mYWN0b3IoZGF0YV9wcm9jZXNzZWQkZXhjZWwpCgoKbW9kZWwgPC0gbG0oYXZnX3NhbGFyeSB+IFJhdGluZyArIGpvYl9zaW1wICsgam9iX3N0YXRlICsgcHl0aG9uX3luICsgUl95biArIHNwYXJrICsgYXdzICsgZXhjZWwgKyBJbmR1c3RyeSArIFJldmVudWUsIGRhdGEgPSBkYXRhX3Byb2Nlc3NlZCkKCgpzdW1tYXJ5KG1vZGVsKQoKYGBgCmBgYHtyfQojIFJlc2lkdWFscyB2cyBGaXR0ZWQKZ2dwbG90KG1vZGVsLCBhZXMoLmZpdHRlZCwgLnJlc2lkKSkgKwogIGdlb21fcG9pbnQoKSArCiAgZ2VvbV9zbW9vdGgoc2UgPSBGQUxTRSkgKwogIGxhYnModGl0bGUgPSAiUmVzaWR1YWxzIHZzIEZpdHRlZCIsIHggPSAiRml0dGVkIHZhbHVlcyIsIHkgPSAiUmVzaWR1YWxzIikgKwogIHRoZW1lX21pbmltYWwoKQoKIyBOb3JtYWwgUS1RCmdncGxvdChtb2RlbCwgYWVzKHNhbXBsZSA9IC5zdGRyZXNpZCkpICsKICBzdGF0X3FxKCkgKwogIHN0YXRfcXFfbGluZSgpICsKICBsYWJzKHRpdGxlID0gIk5vcm1hbCBRLVEiLCB4ID0gIlRoZW9yZXRpY2FsIFF1YW50aWxlcyIsIHkgPSAiU3RhbmRhcmRpemVkIFJlc2lkdWFscyIpICsKICB0aGVtZV9taW5pbWFsKCkKCiMgU2NhbGUtTG9jYXRpb24gKG9yIFNwcmVhZC1Mb2NhdGlvbikKZ2dwbG90KG1vZGVsLCBhZXMoLmZpdHRlZCwgc3FydChhYnMoLnN0ZHJlc2lkKSkpKSArCiAgZ2VvbV9wb2ludCgpICsKICBnZW9tX3Ntb290aChzZSA9IEZBTFNFKSArCiAgbGFicyh0aXRsZSA9ICJTY2FsZS1Mb2NhdGlvbiIsIHggPSAiRml0dGVkIHZhbHVlcyIsIHkgPSAiU3F1YXJlIFJvb3Qgb2YgU3RhbmRhcmRpemVkIFJlc2lkdWFscyIpICsKICB0aGVtZV9taW5pbWFsKCkKCiMgUmVzaWR1YWxzIHZzIExldmVyYWdlCmdncGxvdChtb2RlbCwgYWVzKC5oYXQsIC5zdGRyZXNpZCkpICsKICBnZW9tX3BvaW50KCkgKwogIGdlb21fc21vb3RoKHNlID0gRkFMU0UpICsKICBsYWJzKHRpdGxlID0gIlJlc2lkdWFscyB2cyBMZXZlcmFnZSIsIHggPSAiTGV2ZXJhZ2UiLCB5ID0gIlN0YW5kYXJkaXplZCBSZXNpZHVhbHMiKSArCiAgdGhlbWVfbWluaW1hbCgpCgpgYGAKYGBge3J9CiMgcmF0aW5nIHZzIGF2ZXJhZ2Ugc2FsYXJ5CmdncGxvdChkYXRhX3Byb2Nlc3NlZCwgYWVzKHggPSBSYXRpbmcsIHkgPSBhdmdfc2FsYXJ5KSkgKwogIGdlb21fcG9pbnQoKSArCiAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIiwgc2UgPSBGQUxTRSwgY29sb3IgPSAiYmx1ZSIpICsKICBsYWJzKHRpdGxlID0gIkVmZmVjdCBvZiBSYXRpbmcgb24gQXZnIFNhbGFyeSIsIHggPSAiUmF0aW5nIiwgeSA9ICJBdmVyYWdlIFNhbGFyeSIpICsKICB0aGVtZV9taW5pbWFsKCkKCiMgam9iIHRpdGxlIHZzIHNhbGFyeQpnZ3Bsb3QoZGF0YV9wcm9jZXNzZWQsIGFlcyh4ID0gam9iX3NpbXAsIHkgPSBhdmdfc2FsYXJ5KSkgKwogIGdlb21fYm94cGxvdCgpICsKICBsYWJzKHRpdGxlID0gIkVmZmVjdCBvZiBKb2IgVGl0bGUgb24gU2FsYXJ5IiwgeCA9ICJKb2IgVGl0bGUiLCB5ID0gIkF2ZXJhZ2UgU2FsYXJ5IikgKwogIHRoZW1lX21pbmltYWwoKSArCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA0NSwgaGp1c3QgPSAxKSkKCiMgam9iIHN0YXRlIHZzIHNhbGFyeSAKZ2dwbG90KGRhdGFfcHJvY2Vzc2VkLCBhZXMoeCA9IGpvYl9zdGF0ZSwgeSA9IGF2Z19zYWxhcnkpKSArCiAgZ2VvbV9ib3hwbG90KCkgKwogIGxhYnModGl0bGUgPSAiRWZmZWN0IG9mIEpvYiBTdGF0ZSBvbiBBdmcgU2FsYXJ5IiwgeCA9ICJKb2IgU3RhdGUiLCB5ID0gIkF2ZXJhZ2UgU2FsYXJ5IikgKwogIHRoZW1lX21pbmltYWwoKSArCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA2MCwgaGp1c3QgPSAxKSkKCgoKYGBgCmBgYHtyfQojIHJlZ3Jlc3Npb24gcGxvdCB3aXRoIG51bWJlciBvZiBza2lsbHMgcmVxdXJlZCAKZ2dwbG90KGZpbHRlcmVkX2RhdGEsIGFlcyh4ID0gc2tpbGxfY291bnQsIHkgPSBhdmdfc2FsYXJ5KSkgKwogIGdlb21fcG9pbnQoKSArCiAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIiwgc2UgPSBGQUxTRSwgY29sb3IgPSAiYmx1ZSIpICsKICBsYWJzKHRpdGxlID0gIk51bWJlciBvZiBTa2lsbHMgdnMgU2FsYXJ5IiwgeCA9ICJOdW1iZXIgb2YgU2tpbGxzIiwgeSA9ICJBdmVyYWdlIFNhbGFyeSIpICsKICB0aGVtZV9taW5pbWFsKCkKYGBgCmBgYHtyfQp0YWJsZShkYXRhXzMkc2VuaW9yaXR5KQojIGdyb3VwIGJ5IGluZHVzdHJ5IGFuZCBzZW5pb2l0eSB0byBmaW5kIGF2ZXJhZ2Ugc2FsYXJ5IGZvciBlYWNoIHNlbmlvcml0eSBsZXZlbAppbmR1c3RyeV9zYWxhcnkgPC0gZGF0YV8zICU+JQogIGdyb3VwX2J5KEluZHVzdHJ5LCBzZW5pb3JpdHkpICU+JQogIHN1bW1hcml6ZShhdmdfc2FsYXJ5ID0gbWVhbihhdmdfc2FsYXJ5LCBuYS5ybSA9IFRSVUUpKSAlPiUgdW5ncm91cCgpCgojIHRvcCAxMCBoaWdoZXN0IHNhbGFyaWVzIGZvciBzZW5pb3IgcG9zaXRpb25zCnRvcF9zZW5pb3IgPC0gaW5kdXN0cnlfc2FsYXJ5ICU+JQogIGZpbHRlcihzZW5pb3JpdHkgPT0gInNlbmlvciIpICU+JQogIGFycmFuZ2UoZGVzYyhhdmdfc2FsYXJ5KSkgJT4lIHNsaWNlX2hlYWQobj0xMCkKIyB0b3AgMTAgc2FsYXJpZXMgZm9yIGpyIG9yIG5vbiBzcGVjaWZpZWQgcG9zaXRpb25zCnRvcF9ub25fc2VuaW9yIDwtIGluZHVzdHJ5X3NhbGFyeSAlPiUKICBmaWx0ZXIoc2VuaW9yaXR5ID09ICJqciIgfCBzZW5pb3JpdHkgPT0gIm5hIiApICU+JQogIGFycmFuZ2UoZGVzYyhhdmdfc2FsYXJ5KSkgJT4lIHNsaWNlX2hlYWQobj0xMCkKCgppbmR1c3RyeV9zYWxhcnkgJT4lICBzbGljZSgzOjE4KSAlPiUKZ2dwbG90KGFlcyh4ID0gSW5kdXN0cnksIHkgPSBhdmdfc2FsYXJ5LCBmaWxsID0gc2VuaW9yaXR5KSkgKwogIGdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiLCBwb3NpdGlvbiA9ICJkb2RnZSIpICsKICBsYWJzKHRpdGxlID0gIlNhbGFyeSBieSBJbmR1c3RyeSBhbmQgU2VuaW9yaXR5IiwgeCA9ICJJbmR1c3RyeSIsIHkgPSAiQXZlcmFnZSBTYWxhcnkiKSArCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA0NSwgaGp1c3QgPSAxKSkKCmBgYApgYGB7cn0KIyB0aGVyZSBpcyBvbmUgZW50cnkgaW4gY29sdW1uIGpvYl9zdGF0ZSB0aGF0IGhhcyBMb3MgQW5nZWxlcyBpbnN0ZWFkIG9mIENBLHNvIHdlIG5lZWQgdG8gZml4IHRoYXQKZGF0YV8xIDwtIGRhdGFfMSAlPiUKICBtdXRhdGUoam9iX3N0YXRlID0gaWZlbHNlKGpvYl9zdGF0ZSA9PSAiIExvcyBBbmdlbGVzIiB8IGpvYl9zdGF0ZSA9PSAiQ0EiLCAiIENBIiwgam9iX3N0YXRlKSkgCgojIGNoZWNrCnVuaXF1ZShkYXRhXzEkam9iX3N0YXRlKQpgYGAKCgoKCmBgYHtyfQpoaWdoZXN0X3NhbGFyeSA8LSBkYXRhXzEgJT4lCiAgYXJyYW5nZShkZXNjKGF2Z19zYWxhcnkpKQojIHBsb3Qgb2YgaGlnaGVzdCBzYWxhcmllcyBwZXIgU3RhdGUKaGlnaGVzdF9zYWxhcnkgJT4lIAogIGdncGxvdChhZXMoeCA9IGpvYl9zdGF0ZSwgeSA9IG1heF9zYWxhcnkpKSArCiAgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIsIGZpbGwgPSAib3JhbmdlIikgICsKICBsYWJzKHRpdGxlID0gIkhpZ2hlc3QgU2FsYXJpZXMgYnkgU3RhdGUiLCB4ID0gIlN0YXRlIiwgeSA9ICJTYWxhcnkiKSArCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA5MCwgaGp1c3QgPSAwLjUpKQoKIyBwbG90IG9mIGhpZ2hlc3Qgc2FsYXJpZXMgcGVyIENpdHkKaGlnaGVzdF9zYWxhcnkgJT4lIAogIHNsaWNlX2hlYWQobj0zMCkgJT4lCiAgZ2dwbG90KCBhZXMoeCA9IExvY2F0aW9uLCB5ID0gbWF4X3NhbGFyeSkpICsKICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IiwgZmlsbCA9ICJwdXJwbGUiKSArCiAgbGFicyh0aXRsZSA9ICJIaWdoZXN0IFNhbGFyaWVzIGJ5IENpdHkiLCB4ID0gIlN0YXRlIiwgeSA9ICJTYWxhcnkiKSArCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA0NSwgaGp1c3QgPSAxKSkKCmBgYApgYGB7cn0KdGFibGUoZGF0YV8zJFR5cGUub2Yub3duZXJzaGlwKQpmaWx0ZXJlZF9kYXRhIDwtIGRhdGFfMyAlPiUKICBmaWx0ZXIoVHlwZS5vZi5vd25lcnNoaXAgJWluJSBjKCJDb21wYW55IC0gUHJpdmF0ZSIsICJDb21wYW55IC0gUHVibGljIiwiR292ZXJubWVudCIsICJOb25wcm9maXQgT3JnYW5pemF0aW9uIikpICU+JQogIGdyb3VwX2J5KFR5cGUub2Yub3duZXJzaGlwKSAlPiUKICBzdW1tYXJpemUoYXZnX3NhbGFyeSA9IG1lYW4oYXZnX3NhbGFyeSkpCgpmaWx0ZXJlZF9kYXRhICU+JQpnZ3Bsb3QoYWVzKHggPSBUeXBlLm9mLm93bmVyc2hpcCwgeSA9IGF2Z19zYWxhcnksIGZpbGwgPSBUeXBlLm9mLm93bmVyc2hpcCkpICsKICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IikgKwogIGxhYnModGl0bGUgPSAiU2FsYXJ5IGJ5IE93bmVyc2hpcCBUeXBlIiwgeCA9ICJUeXBlIG9mIE93bmVyc2hpcCIsIHkgPSAiQXZlcmFnZSBTYWxhcnkiKSArCiAgdGhlbWVfbWluaW1hbCgpICsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDQ1LCBoanVzdCA9IDEpKQoKYGBgCmBgYHtyfQojIGF2ZXJhZ2UgUGF5IHBlciBTdGF0ZSAoZm9yIDIwIFN0YXRlcykKZGF0YV8xICU+JQogIGdyb3VwX2J5KGpvYl9zdGF0ZSkgJT4lCiAgc3VtbWFyaXplKGF2Z19zYWxhcnkgPSBtZWFuKGF2Z19zYWxhcnkpKSAlPiUKICBhcnJhbmdlKGRlc2MoYXZnX3NhbGFyeSkpICU+JQogIHNsaWNlX2hlYWQobj0yMCkgJT4lCiAgZ2dwbG90KGFlcyh4ID0gcmVvcmRlcihqb2Jfc3RhdGUsIGF2Z19zYWxhcnkpLCB5ID0gYXZnX3NhbGFyeSwgZmlsbCA9IGpvYl9zdGF0ZSkpICsKICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IikgKwogIGxhYnModGl0bGUgPSAiQXZlcmFnZSBTYWxhcnkgYnkgU3RhdGUiLCB4ID0gIlN0YXRlIiwgeSA9ICJBdmVyYWdlIFNhbGFyeSIpICsKICB0aGVtZV9taW5pbWFsKCkgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gOTAsIGhqdXN0ID0gMSkpCgpgYGAKYGBge3J9CiMgcmVtb3ZlIHN0YXRlcyB3aXRoIDEgZW50cnkgb2Ygd2l0aCByZXBlYXRpbmcgZW50cmllcwpzdGF0ZV9kYXRhIDwtIGRhdGFfMSAlPiUKICAgIGZpbHRlcighKGpvYl9zdGF0ZSAlaW4lIGMoIiBLUyIsICIgREUiLCAiIFNDIiwgIiBSSSIpKSkgCiMgZnVuY3Rpb24gdGhhdCBjcmVhdGVzIHZpb2xpbiBwbG90cyBmb3IgZWFjaCBzdGF0ZQpzdGF0ZV9zYWxhcnkgPC0gZnVuY3Rpb24oc3RhdGVfbmFtZSkgewogIHN0YXRlX2RhdGEgPC0gZGF0YV8xICU+JQogICAgZmlsdGVyKGpvYl9zdGF0ZSA9PSBzdGF0ZV9uYW1lKSAKCiAgIGdncGxvdChzdGF0ZV9kYXRhLCBhZXMoeCA9IGpvYl9zdGF0ZSwgeSA9IGF2Z19zYWxhcnksIGZpbGwgPSBqb2Jfc3RhdGUpKSArCiAgICBnZW9tX3Zpb2xpbigpICsKICAgIGxhYnModGl0bGUgPSBwYXN0ZSgiU2FsYXJ5IFJhbmdlIGluIiwgc3RhdGVfbmFtZSksIHggPSAiU3RhdGUiLCB5ID0gIlNhbGFyeSIpICsKICAgIHRoZW1lX21pbmltYWwoKSArCiAgICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDQ1LCBoanVzdCA9IDEpKSAKCn0KIyBzZXBhcmF0ZSBwbG90cyBmb3IgZWFjaCBzdGF0ZQpzdGF0ZV9wbG90cyA8LSBsYXBwbHkodW5pcXVlKHN0YXRlX2RhdGEkam9iX3N0YXRlKSwgc3RhdGVfc2FsYXJ5KQpzdGF0ZV9wbG90cwpgYGAKYGBge3J9CiMgY2hlY2sgd2h5IGNlcnRhaW4gcGxvdHMgZGlkbid0IHdvcmsgCndyb25nX2VudHJpZXMgPC0gZGF0YV8xICU+JQogIGZpbHRlcihqb2Jfc3RhdGUgPT0gIiBLUyIgfCBqb2Jfc3RhdGUgPT0iIERFIiB8am9iX3N0YXRlID09ICIgU0MiIHxqb2Jfc3RhdGUgPT0gIiBSSSIpCnByaW50KHdyb25nX2VudHJpZXMpCgpgYGAKYGBge3J9CiMgY2hlY2sgYWxsIHRoZSB1bmlxdWUgdmFsdWVzIGZvciBSZXZlbnVlIAojIHRhYmxlKGRhdGFfMyRSZXZlbnVlKQoKIyByZXZlbnVlIGNhdGVnb3JpZXMgd2l0aCB0aGUgbW9zdCBlbnRyaWVzIApyZXZlbnVlX2NhdGVnb3JpZXMgPC0gYygKICAiJDUwIHRvICQxMDAgbWlsbGlvbiAoVVNEKSIsCiAgIiQxMDAgdG8gJDUwMCBtaWxsaW9uIChVU0QpIiwKICAiJDUwMCBtaWxsaW9uIHRvICQxIGJpbGxpb24gKFVTRCkiLAogICIkMSB0byAkMiBiaWxsaW9uIChVU0QpIiwKICAiJDEwKyBiaWxsaW9uIChVU0QpIgopCgojIGZpbHRlciBkYXRhIGFuZCBjcmVhdGUgcGxvdCAKZGF0YV8zICU+JSBmaWx0ZXIoUmV2ZW51ZSAlaW4lIHJldmVudWVfY2F0ZWdvcmllcykgJT4lCiAgZ2dwbG90KGFlcyh4ID0gUmV2ZW51ZSwgeSA9IGF2Z19zYWxhcnksIGZpbGwgPSBSZXZlbnVlKSkgKwogIGdlb21fYm94cGxvdCgpICsKICBsYWJzKAogICAgdGl0bGUgPSAiU2FsYXJ5IERpc3RyaWJ1dGlvbiBieSBDb21wYW55IFJldmVudWUiLAogICAgeCA9ICJDb21wYW55IFJldmVudWUiLAogICAgeSA9ICJBdmVyYWdlIFNhbGFyeSIKICApICsKICB0aGVtZV9taW5pbWFsKCkgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gNDUsIGhqdXN0ID0gMSksCiAgICAgICAgIHBsb3QudGl0bGUgPSBlbGVtZW50X3RleHQoc2l6ZSA9IDEyLCBmYWNlID0gImJvbGQiLCB2anVzdCA9IDIpLAogICAgICAgIHBsb3QubWFyZ2luID0gbWFyZ2luKDUsMCwwLDEwKSwKICAgICAgICBsZWdlbmQudGV4dCA9IGVsZW1lbnRfdGV4dChzaXplID0gOSkKICAgICAgKQpgYGAKYGBge3J9CgoKYGBgCgoKYGBge3J9CnVuaXF1ZShkYXRhXzQkd29ya195ZWFyKQp0YWJsZShkYXRhXzEkU2l6ZSkKdGFibGUoZGF0YV80JHdvcmtfeWVhcikKYGBgCmBgYHtyfQoKYGBgCgpgYGB7cn0KCmBgYAoKCgpgYGB7cn0KdGFibGUoZGF0YV8zJFJldmVudWUpCgpgYGAKCmBgYHtyfSAKIyBmaW5kIGFsbCB0aGUgdW5pcXVlIGpvYiB0aXRsZXMgZnJvbSBkYXRhXzMKdW5pcXVlKGRhdGFfMyRqb2Jfc2ltcCkKIyBzZWUgaG93IG1hbnkgb2YgZWFjaCB1bmlxdWUgam9iIHRpdGxlIHRoZXJlIGFyZSAKdGFibGUoZGF0YV8zJGpvYl9zaW1wKQpgYGAKCgoKYGBge3J9CgpgYGAKCmBgYHtyfQoKYGBgCgpgYGB7cn0KCmBgYAoKYGBge3J9CgpgYGAKCmBgYHtyfQoKYGBgCgpgYGB7cn0KCmBgYAoK